import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
import pandas as pd
from matplotlib.font_manager import FontProperties
import statistics

f1 = open('correct_unique_gRNAs.txt','r')
iden = [int(i.split('\t')[1]) for i in f1.readlines()]
f1.close()

q3,q1 = np.percentile(iden, [75 ,25])
print(q3,q1)

dock_iden = []

for i in iden:
	if i >= 5000:
		dock_iden.append(5000)
	else:
		dock_iden.append(i)

print(max(iden),min(iden),statistics.stdev(iden),statistics.mean(iden))

data = np.array(dock_iden)
s = pd.Series(dock_iden)

fig,ax = plt.subplots(figsize=(4,4))
fig.subplots_adjust(left=0.35, bottom=0.1, right=0.65, top=0.9)

den = s.plot(kind='kde',xlim=(0,5000),ylim=(0,0.0021),color='black',linewidth=1)

df = den.get_lines()[0].get_data()

plt.fill(df[0],df[1],facecolor='#FF8247',alpha=0.7)

plt.savefig('NGS_plasmid_correct.pdf',format='pdf')
plt.show()